# !/usr/bin/env python
# -*- coding: utf-8 -*-
# @File  : 波士顿房价预测_L1正则化.py
# @Author: dongguangwen
# @Date  : 2025-02-06 14:37
#  0.导包
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LinearRegression, SGDRegressor, Lasso
from sklearn.metrics import mean_squared_error

#  1.加载数据
data = pd.read_csv('./data/波士顿房价.csv')

#  2.数据集划分
x_train, x_test, y_train, y_test = train_test_split(data.iloc[:, :-1], data.iloc[:, -1], test_size=0.2, random_state=22)

# 3.标准化
process = StandardScaler()
x_train = process.fit_transform(x_train)
x_test = process.transform(x_test)

# 4.模型训练
model = Lasso(alpha=0.01)
model.fit(x_train, y_train)

print("模型的权重系数为:\n", model.coef_)
print("模型的偏置为:\n", model.intercept_)

# 5.预测
y_pred = model.predict(x_test)
print(y_pred)

# 6.模型评估
error_mse = mean_squared_error(y_test, y_pred)
print("L1正则化均方误差: ", error_mse)

"""
模型的权重系数为:
 [-0.6922603   1.08929703 -0.15889987  0.86070732 -1.98162634  2.73575564
 -0.1435678  -3.28646011  2.40532166 -1.54981379 -1.66181457  0.90270154
 -3.8045163 ]
模型的偏置为:
 22.57970297029703
[28.04422247 31.34919385 21.15774429 32.91049299 19.94150829 19.1949156
 21.07862094 19.43807315 19.58287935 32.49409351 20.98244284 27.71888683
 15.45772074 19.73295225 36.58390253 18.58690976  8.98503497 18.37625737
 30.49280642 24.30717408 19.23205714 33.70402271 29.80815408 17.66929638
 34.7184438  26.33855039 34.49810184 27.30777887 19.23502843 15.02769669
 30.48633827 15.33689678 37.08519534  7.34829368 16.21106452 17.36594449
  7.43732466 20.19225112 40.68902982 28.83658091 25.16636821 17.85168014
 39.42346648  6.96174994 21.99608271 25.11404292 20.29283673 20.39867157
 17.41934164 26.24209548  8.78255871 27.31089853 30.81096312 16.6805587
  9.37964812 35.09788057 32.02821115 22.00724918 17.44890656 21.98130033
 23.52142915 23.94477573 20.06461028 38.22083618 24.80942919 19.95124863
 14.01172728  6.75004947 42.18529629 21.89869641 16.92836073 22.45094493
 40.40301889 21.45672056 36.4788331  26.99690068 21.06094552 20.29258
 25.22431274 22.3458079  31.05087415 20.25143299 24.23229094 31.34127949
 26.76382535 21.0764222  28.87809906 21.85995099 26.37587563 19.57752574
 25.28031718 24.3561086  20.06375804 16.12613565 15.35805527 18.59194161
 24.59532435 16.56699196 21.10129015 26.51973261 20.95778387 18.01939752]
L1正则化均方误差:  20.8022675515861
"""
